import re
import time

import requests


def get_one_page(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',
        # 'Cookie': '__uv_=6653203112; SpMLdaPxuv=m6333716428; Hm_up_49411f7bde52035653f2e2b70a0bb6a5=%7B%22uid_%22%3A%7B%22value%22%3A%226653203112%22%2C%22scope%22%3A1%7D%7D; WOlTvIlgRpuvid_=0817; Hm_lvt_5a9573957327e40b58294447cd1d8ad2=1598604413,1598833034; Hm_up_5a9573957327e40b58294447cd1d8ad2=%7B%22uid_%22%3A%7B%22value%22%3A%226653203112%22%2C%22scope%22%3A1%7D%7D; Hm_lvt_49411f7bde52035653f2e2b70a0bb6a5=1598604413,1598833035; PHPSESSID=g4j6atguc834c6a3nhsnb06e92; pvid=1598833902113; WOlTvIlgRptime_=1598833902113; Hm_lpvt_49411f7bde52035653f2e2b70a0bb6a5=1598833902; Hm_lpvt_5a9573957327e40b58294447cd1d8ad2=1598833903'

    }
    # ip代理
    proxy = {
        # 'http': get_proxy()
        'http': '139.199.153.25:1080',
    }
    try:
        response = requests.get(url, headers=headers, proxies=proxy)
        if response.status_code == 200:
            return response.text
        return None
    except:

        return None


def parse_one_page(html):
    pattern = re.compile(
        '<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a*?title='
        '"(.*?)".*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)'
        '</i>.*?fraction.*?>(.*?)</i>.*?</dd>', re.S
    )
    # items = pattern.findall(html)
    items = re.findall(pattern, html)
    print(items)
    # 获取数据
    for item in items:
        return {
            'index': item[0],
            'image': item[1],
            'title': item[2].strip(),
            'actor': item[3].strip()[3:] if len(item[3]) > 3 else '',
            'time': item[4].strip()[5:] if len(item[4]) > 5 else '',
            'score': item[5].strip() + item[6].strip()
        }


def main(offset):
    # 请求全部页面
    url = 'https://maoyan.com/board/4?offset=' + str(offset)
    html = get_one_page(url)
    parse_one_page(html)

    # print(html)


if __name__ == '__main__':
    for i in range(10):
        main(i * 10)
        time.sleep(1)
